### Written by Joseph Briggs (Board alum) for the Business Cycles paper
### Formatted to be used before tempdisagg was implemented

print("RUNNING KALMAN FILTER")

# Functions ---------------------------------------------------------------

sigmoid=function(XX) {
  out=1/(1+exp(-XX))
  return(out)
}


kalman <- function(y,X,interp_dates) {
  
  X <- select(X, -date)
  X <- as.matrix(X, ncol = length(X))
  
  NX=ncol(X)
  
  ylen = length(y)
  
  k <<- 12
  
  vec1 <<- which(!is.na(y)) # where we have SCF observations 
  
  vec2 <<- seq(1,ylen,k) # where there "should" be SCF observations 
  scf_expected <<- length(vec2)
  
  scf_present_ind <<- which( vec2 %in% vec1)
  
  forecast_ind <<- which(!(vec2 %in% vec1))
  
  num_forecast <<- length(forecast_ind)
  
  
  
  # Nt <<- nrow(X)-(1 + num_forecast)
  Nt <<- nrow(X)-(1 + num_forecast * k)
  
  # zhat = c(rep(0,nrow(X)))
  # betahat = NA
  # u.hat = NA
  # a = NA
  # a2 = NA
  # A = NA
  
  y_in <- y
  y_in[is.na(y_in)] <- 0
  
  y <- y[!is.na(y)]
  y <- y[scf_present_ind]
  
  #Initialize model
  
  beta_0=solve(t(X[ seq(1,  Nt+1,k), ]) %*% X[ seq(1,  Nt+1,k  ), ], tol=1e-36) %*% t(X[ seq(1,  Nt+1,k  ), ]) %*% y
  a_0=0
  sigeta_0 = sd(y-X[ seq(1,  Nt+1,k  ), ]%*%beta_0)
  phi_0=0
  theta_0=c(beta_0, a_0, log(sigeta_0*3))
  lower0=c(-2*abs(beta_0), -5, log(sigeta_0*.01) )
  upper0=c(2*abs(beta_0), 5, log(sigeta_0*10) )
  
  f=function(theta) ksmoother(y_in, X,k,  theta[1:NX], sigmoid(theta[NX+1]), phi_0, exp(theta[NX+2]))[1]
  #MLE estimation
  
  output=optim(theta_0, f, method="L-BFGS-B", lower=lower0, upper=upper0)
  
  #store output
  output2=ksmoother(y_in, X,k, beta = output$par[1:NX], a = sigmoid(output$par[NX+1]), phi = phi_0, sigma_eta = exp(output$par[NX+2]))
  zhat=unlist(output2[2])
  se=unlist(output2[3])^.5
  
  kal_output <- list(zhat, se)
  names(kal_output) <- c("zhat", "se")
  return(kal_output)
}







ksmoother=function(y, X,k, beta, a, phi , sigma_eta) {
  # Nt=nrow(X)-1
  
  Nt <- nrow(X)
  y_in <- y
  
  scf_present_ind_long <- scf_present_ind * 12 -11
  #create sequence of observed y's
  #Control inputs generated 
  beta_X=t(X%*%beta)
  #Adjust observation to account for controls.
  
  y_in[scf_present_ind_long] <- y_in[scf_present_ind_long] - beta_X[scf_present_ind_long]
  
  if (phi!=0){
    A=diag(2)
    A[1,1]=phi
    A[1,2]=a
    A[2,2]=a
    #
    Q=matrix(0, nrow=2, ncol=2)
    Q[1,1]=sigma_eta^2
    Q[1,2]=sigma_eta^2
    Q[2,1]=sigma_eta^2
    Q[2,1]=sigma_eta^2 
    # Expected state vector defined.
    state=matrix(0, 2, Nt+1)
    state[,1]=y[1]
    covar=list()
    covar[[1]]=matrix(0, 2, 2)
    covar[[1]][2, 2]=sigma_eta^2
    #
    Var_next=list()
    state_next=state
    final_state=matrix(0, 2, Nt)
    final_covar=list()
    # Create matrix of observation indicators.
    B=matrix(0,2, Nt )
    B[1, scf_present_ind_long]=1
  }
  
  if (phi==0){
    A=matrix(a,1,1) 
    Q=matrix(sigma_eta^2, 1,1)
    # Expected state vector defined.
    state=matrix(0, 1, Nt)
    # state[,1]=0
    covar=list()
    covar[[1]]=matrix(0, 1, 1)
    covar[[1]][1, 1]=sigma_eta^2
    # 
    Var_next=list()
    state_next=state
    final_state=matrix(0, 1, Nt)
    final_covar=list()
    # Create matrix of observation indicators.
    B=matrix(0,1, Nt)
    B[1, scf_present_ind_long]=1
  }
  
  # tt <- 1
  #Kalman filter step
  for (tt in 1:(Nt-1)) {
    Btt=matrix(B[,tt+1],1,length(B[,tt+1]))
    state_next[,tt+1]=A%*%state[,tt]
    Var_next[[tt+1]]=A%*%covar[[tt]]%*%t(A)+Q
    if (sum(Btt)>0){
      Innovation=y_in[tt+1]-Btt%*%state_next[,tt+1]
      Innovation_covar=Btt%*%Var_next[[tt+1]]%*%t(Btt)
      Kalman_Gain=Var_next[[tt+1]]%*%t(Btt)%*%solve(Innovation_covar, tol=1e-36)
      state[,tt+1]=state_next[,tt+1]+Kalman_Gain%*%Innovation
      #covar[[tt+1]]=Var_next[[tt+1]]-Kalman_Gain%*%Btt%*%Var_next[[tt+1]]
      covar[[tt+1]]=Var_next[[tt+1]]-Kalman_Gain%*%Innovation_covar%*%t(Kalman_Gain)
    } else {
      state[,tt+1]=state_next[,tt+1]
      covar[[tt+1]]=Var_next[[tt+1]]
    }
  }
  #
  final_state[Nt]=state[Nt]
  final_covar[[Nt]]=covar[[Nt]]
  # Kalman Smoother Step
  for (tt in (Nt-1):1) {
    Back_var=covar[[tt]]%*%t(A)%*%solve(Var_next[[tt+1]], tol=1e-36)
    final_state[,tt]=state[,tt]+Back_var%*%(final_state[,tt+1]-state_next[,tt+1])
    final_covar[[tt]]=covar[[tt]]+Back_var%*%(final_covar[[tt+1]]-Var_next[[tt+1]])%*%t(Back_var)
  }
  #Calculate likelihood
  if (phi==0){
    ll=-Nt/2*log(2*pi)-1/2*log(sigma_eta^2/(1-a^2))-(1-a^2)/(2*sigma_eta^2)*(final_state[1])^2-(Nt-1)/2*log(sigma_eta^2)
    for (tt in 2:(Nt-1)) {  
      ll=ll-1/(2*sigma_eta^2)*(final_state[tt]-a*final_state[tt-1])^2  
    }
  }
  #
  final_state[1,]=final_state[1,]+beta_X
  
  ll=-unlist(ll)
  #print(ll)
  return(list(ll, final_state, final_covar))
}


# Interpolation/extrapolation ---------------------------------------------

scf_data_long <- scf_data_list

scf_data_long <- lapply(scf_data_long, function(df) df %>% complete(date = interpqtrs))

scf_data_imputed <- list()
scf_data_disagg <- list()
se <- list() 

for (gr in names(scf_data_list)) {
  for (line in names(series)) {
    
    scf_data_imputed[[gr]][["date"]] <- scf_data_long[[gr]][["date"]]
    scf_data_imputed[[gr]][[line]] <- kalman(y = scf_data_long[[gr]][[line]], X = indicators_list[[gr]][[line]], interp_dates = scf_data_long[[gr]]$date)
  
    scf_data_disagg[[gr]][["date"]] <- scf_data_long[[gr]][["date"]]
    scf_data_disagg[[gr]][[line]] <- scf_data_imputed[[gr]][[line]]

    se[[gr]][["date"]] <- scf_data_long[[gr]][["date"]]
    se[[gr]][[line]] <- scf_data_imputed[[gr]][[line]][["se"]]
    scf_data_imputed[[gr]][[line]] <- scf_data_imputed[[gr]][[line]][["zhat"]]

  }
  
  scf_data_imputed[[gr]] <- bind_cols(scf_data_imputed[[gr]])
  se[[gr]] <- bind_cols(se[[gr]])
  
}


se <- bind_rows(se, .id = "cat") %>%
  complete(date, cat) %>% # if a certain group does not exist for a particular date, add it in
  mutate(cat = factor(cat, levels = groupnames)) %>%
  arrange(date, cat) %>%
  filter(date <= as.yearqtr(lastqtr))

write_csv(se, path = file.path(output_directory, dfa_file_names(item = "se", group = group, agg_level = agg_level,  method = "kalman", filetag = filetag, scf_ignore = scf_ignore)))
